In this document we are looking at: 1) the best combination of time covariates model, both interaction and no interaction 2) the best combination of Factor loading model 3) the best overall combination

The results show 2 types of sensitivity to time interval 1) The Factor10_3 2FL model is not the best any more if we cut away the 2016-17. It is doing better later. 2) The Climatology is also doing horribly the first two years, with performance stabilizing after 3 years. 3) No question that we are doing better than climatology, but not as much.

  1. Even if PC2 does not contribute much alone, it does contribute when the model increases.
  2. Classic case of overfitting whit more than 2 PC compontents.

Some BLAS problems Inter1$Result[pred_1<70000&pred_1>10000,sqrt(mean((volume - pred_1)^2))] Because of problems with overflow.

1) Load Models

library(DemandForecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(data.table)
path = '/Users/Eirik/Desktop/Master2023/Output_data/output_demand_forecast_best_baseline/'
load(paste0(path, 'Year1.Rda'))
load(paste0(path, 'Climatology.Rda'))
load(paste0(path, 'Naive.Rda'))
load(paste0(path, 'No_inter_1.Rda'))
load(paste0(path, 'No_inter_2.Rda'))
load(paste0(path, 'No_inter_3.Rda'))
load(paste0(path, 'No_inter_4.Rda'))
load(paste0(path, 'No_inter_5.Rda'))
load(paste0(path, 'No_inter_6.Rda'))
load(paste0(path, 'No_inter_7.Rda'))
load(paste0(path, 'No_inter_8.Rda'))
load(paste0(path, 'Inter1.Rda'))
load(paste0(path, 'Inter2.Rda'))
load(paste0(path, 'Inter3.Rda'))
load(paste0(path, 'Inter4.Rda'))
load(paste0(path, 'Custom1.Rda'))
load(paste0(path, 'Custom2.Rda'))
load(paste0(path, 'Custom3.Rda'))
load(paste0(path, 'Custom4.Rda'))
load(paste0(path, 'Custom5.Rda'))
load(paste0(path, 'Custom6.Rda'))
load(paste0(path, 'Custom7.Rda'))
load(paste0(path, 'Custom8.Rda'))
load(paste0(path, 'Custom9.Rda'))
load(paste0(path, 'Custom10.Rda'))

load(paste0(path, 'Custom_lin1.Rda'))
load(paste0(path, 'Custom_lin2.Rda'))
load(paste0(path, 'Custom_lin3.Rda'))
load(paste0(path, 'Custom_lin4.Rda'))
load(paste0(path, 'Custom_lin5.Rda'))
load(paste0(path, 'Custom_lin6.Rda'))
load(paste0(path, 'Custom_lin7.Rda'))
load(paste0(path, 'Custom_lin8.Rda'))
load(paste0(path, 'Custom_lin9.Rda'))
load(paste0(path, 'Custom_lin10.Rda'))

load(paste0(path, 'Custom_comb1.Rda'))
load(paste0(path, 'Custom_comb2.Rda'))
load(paste0(path, 'Custom_comb3.Rda'))
load(paste0(path, 'Custom_comb4.Rda'))
load(paste0(path, 'Custom_comb5.Rda'))
load(paste0(path, 'Custom_comb6.Rda'))
load(paste0(path, 'Custom_comb7.Rda'))
load(paste0(path, 'Custom_comb8.Rda'))
load(paste0(path, 'Custom_comb9.Rda'))
load(paste0(path, 'Custom_comb10.Rda'))

load(paste0(path, 'Mean_temp1.Rda'))
load(paste0(path, 'Mean_temp2.Rda'))

load(paste0(path, 'Factor10_inter1.Rda'))
load(paste0(path, 'Factor10_inter2.Rda'))
load(paste0(path, 'Factor10_inter3.Rda'))
load(paste0(path, 'Factor10_inter4.Rda'))

2) Naive combination model

Naive= demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                        p_comps = 0,  reg_form = "volume ~ hour + season + w_day + week + month + year")

3) Best model no interaction

No_inter_1= demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 0,  reg_form = "volume ~ as.factor(hour)  + as.factor(w_day) + s(week) + month1 + month2 + season1 + season2 + year")

No_inter_2= demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 0,  reg_form = "volume ~ s(hour)  + as.factor(w_day) + s(week) + month1 + month2 + season1 + season2 + year")

No_inter_3= demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 0,  reg_form = "volume ~ as.factor(hour)  + as.factor(w_day) + as.factor(week) + month1 + month2 + season1 + season2 + year")

No_inter_4= demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 0,  reg_form = "volume ~ as.factor(hour)  + as.factor(w_day) + s(week) + s(month) + season1 + season2 + year", incl_climatology = TRUE)

No_inter_5= demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 0,  reg_form = "volume ~ as.factor(hour)  + as.factor(w_day) + s(week) + month1 + month2 + as.factor(season1)+ year")

##############
No_inter_6= demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 0,  reg_form = "volume ~ s(hour)  + as.factor(w_day) + s(week) + s(month) + season1 + season2 + year")

No_inter_7= demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 0,  reg_form = "volume ~ as.factor(hour)  + as.factor(w_day) + as.factor(week) +s(month) + season1 + season2 + year")

No_inter_8= demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 0,  reg_form = "volume ~ as.factor(hour)  + as.factor(w_day) + s(week) + s(month) + as.factor(season1)+ year")
plot(No_inter_1$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date], type = 'l', ylim = c(2000, 13700))
lines(No_inter_2$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(No_inter_3$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(No_inter_4$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date], col = 'red')
lines(No_inter_5$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(No_inter_6$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(No_inter_5$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(No_inter_7$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(Naive$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(Climatology$Result[,sqrt(mean((volume - clima_pred)^2)), keyby = init_date])

plot(No_inter_1$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)], type = 'l', ylim = c(2000, 13700))
lines(No_inter_2$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(No_inter_3$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(No_inter_4$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)], col = 'red')
lines(No_inter_5$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(No_inter_6$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(No_inter_7$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(No_inter_8$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(Naive$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(Climatology$Result[,sqrt(mean((volume - clima_pred)^2)), keyby = month(init_date)])

No_inter_1$Result[,sqrt(mean((volume - pred_1)^2))]
## [1] 3264.242
No_inter_2$Result[,sqrt(mean((volume - pred_1)^2))] 
## [1] 3268.487
No_inter_3$Result[,sqrt(mean((volume - pred_1)^2))] 
## [1] 3262.578
No_inter_4$Result[,sqrt(mean((volume - pred_1)^2))] #Best s(month)
## [1] 3255.394
No_inter_5$Result[,sqrt(mean((volume - pred_1)^2))]
## [1] 3263.506
No_inter_6$Result[,sqrt(mean((volume - pred_1)^2))]
## [1] 3259.662
No_inter_7$Result[,sqrt(mean((volume - pred_1)^2))]
## [1] 3261.279
No_inter_8$Result[,sqrt(mean((volume - pred_1)^2))]
## [1] 3255.397
Naive$Result[,sqrt(mean((volume - pred_1)^2))]
## [1] 7023.913
Climatology$Result[,sqrt(mean((volume - clima_pred)^2))]
## [1] 4114.535

4) Interaction models

Inter1 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, 
                         train_y = 5, p_comps = 0,  
                         reg_form = "volume ~ as.factor(hour):as.factor(week)  + as.factor(w_day):as.factor(week) + s(week) + s(month) + season1 + season2 + year")

Inter2 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 0,  reg_form = "volume ~ as.factor(hour):as.factor(month)  + as.factor(w_day):as.factor(week) + s(week) + s(month) + season1 + season2 + year")

Inter3 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 0,  reg_form = "volume ~ as.factor(hour):as.factor(week)  + as.factor(w_day):as.factor(month) + s(week) + s(month) + season1 + season2 + year") #Have run

Inter4 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 0,  reg_form = "volume ~ as.factor(hour):as.factor(month)  + as.factor(w_day):as.factor(month) + s(week) + s(month) + season1 + season2 + year") #Have run
plot(Inter1$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date], type = 'l', ylim = c(2000, 13700))
lines(Inter2$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(Inter3$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(Inter4$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date], col = 'red')

lines(Naive$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(Climatology$Result[,sqrt(mean((volume - clima_pred)^2)), keyby = init_date])

plot(Inter1$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)], type = 'l', ylim = c(2000, 13700))
lines(Inter2$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(Inter3$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(Inter4$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)], col = 'red')

lines(Naive$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(Climatology$Result[,sqrt(mean((volume - clima_pred)^2)), keyby = month(init_date)])

Inter1$Result[pred_1<70000&pred_1>10000,sqrt(mean((volume - pred_1)^2))]
## [1] 3199.394
#Inter1$Result[,sqrt(mean((volume - pred_1)^2))]
Inter2$Result[,sqrt(mean((volume - pred_1)^2))] 
## [1] 3211.302
Inter3$Result[,sqrt(mean((volume - pred_1)^2))] 
## [1] 3136.346
Inter4$Result[,sqrt(mean((volume - pred_1)^2))] #Best s(month)
## [1] 3126.086
Naive$Result[,sqrt(mean((volume - pred_1)^2))]
## [1] 7023.913
Climatology$Result[,sqrt(mean((volume - clima_pred)^2))]
## [1] 4114.535

5a) Single Factor loading models (splines)

Custom1 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 1, custom = 's(PC1)',  reg_form = "volume ~ 1", cores =48)
Custom2 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 2, custom = 's(PC2)',  reg_form = "volume ~ 1", cores =48)
Custom3 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 3, custom = 's(PC3)',  reg_form = "volume ~ 1", cores =48)
Custom4= demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps =4, custom = 's(PC4)',  reg_form = "volume ~ 1", cores =48)
Custom5 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 5, custom = 's(PC5)',  reg_form = "volume ~ 1", cores =48)
Custom6 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 6, custom = 's(PC6)',  reg_form = "volume ~ 1", cores =48)
Custom7 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 7, custom = 's(PC7)',  reg_form = "volume ~ 1", cores =48)
Custom8 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 8, custom = 's(PC8)',  reg_form = "volume ~ 1", cores =48)
Custom9 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 9, custom = 's(PC9)',  reg_form = "volume ~ 1", cores =48)
Custom10 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 10, custom = 's(PC10)',  reg_form = "volume ~ 1", cores =48)
plot(Custom1$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date], type = 'l', ylim = c(2000, 13700), col = 'red')
lines(Custom2$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom3$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom4$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom5$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom6$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom5$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom7$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom8$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom9$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom10$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Naive$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(Climatology$Result[,sqrt(mean((volume - clima_pred)^2)), keyby = init_date])

plot(Custom1$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)], type = 'l', ylim = c(2000, 13700), col = 'red')
lines(Custom2$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom3$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom4$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom5$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom6$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom7$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom8$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom9$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom10$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Naive$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(Climatology$Result[,sqrt(mean((volume - clima_pred)^2)), keyby = month(init_date)])

Custom1$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 5323.503
Custom2$Result[,sqrt(mean((volume - pred_2)^2))] 
## [1] 7971.524
Custom3$Result[,sqrt(mean((volume - pred_2)^2))] 
## [1] 8485.076
Custom4$Result[,sqrt(mean((volume - pred_2)^2))] #Best s(month)
## [1] 8343.642
Custom5$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 8263.973
Custom6$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 7907.42
Custom7$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 7837.656
Custom8$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 8032.133
Custom9$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 8599.721
Custom10$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 7820.394
Naive$Result[,sqrt(mean((volume - pred_1)^2))]
## [1] 7023.913
Climatology$Result[,sqrt(mean((volume - clima_pred)^2))]
## [1] 4114.535

5b) Single Factor loading models (linear)

Custom_lin1 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 1, custom = 'PC1',  reg_form = "volume ~ 1", cores =48)
Custom_lin2 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 2, custom = 'PC2',  reg_form = "volume ~ 1", cores =48)
Custom_lin3 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 3, custom = 'PC3',  reg_form = "volume ~ 1", cores =48)
Custom_lin4= demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps =4, custom = 'PC4',  reg_form = "volume ~ 1", cores =48)
Custom_lin5 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 5, custom = 'PC5',  reg_form = "volume ~ 1", cores =48)
Custom_lin6 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 6, custom = 'PC6',  reg_form = "volume ~ 1", cores =48)
Custom_lin7 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 7, custom = 'PC7',  reg_form = "volume ~ 1", cores =48)
Custom_lin8 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 8, custom = 'PC8',  reg_form = "volume ~ 1", cores =48)
Custom_lin9 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 9, custom = 'PC9',  reg_form = "volume ~ 1", cores =48)
Custom_lin10 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 10, custom = 'PC10',  reg_form = "volume ~ 1", cores =48)
plot(Custom_lin1$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date], type = 'l', ylim = c(2000, 13700), col = 'red')
lines(Custom_lin2$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_lin3$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_lin4$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_lin5$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_lin6$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_lin5$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_lin7$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_lin8$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_lin9$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_lin10$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Naive$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(Climatology$Result[,sqrt(mean((volume - clima_pred)^2)), keyby = init_date])

plot(Custom_lin1$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)], type = 'l', ylim = c(2000, 13700), col = 'red')
lines(Custom_lin2$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_lin3$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_lin4$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_lin5$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_lin6$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_lin7$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_lin8$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_lin9$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_lin10$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Naive$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(Climatology$Result[,sqrt(mean((volume - clima_pred)^2)), keyby = month(init_date)])

Custom_lin1$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 5549.052
Custom_lin2$Result[,sqrt(mean((volume - pred_2)^2))] 
## [1] 8000.772
Custom_lin3$Result[,sqrt(mean((volume - pred_2)^2))] 
## [1] 8191.449
Custom_lin4$Result[,sqrt(mean((volume - pred_2)^2))] #Best s(month)
## [1] 8129.255
Custom_lin5$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 8239.265
Custom_lin6$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 7946.144
Custom_lin7$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 7974.365
Custom_lin8$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 8005.985
Custom_lin9$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 8018.849
Custom_lin10$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 8026.336
Naive$Result[,sqrt(mean((volume - pred_1)^2))]
## [1] 7023.913
Climatology$Result[,sqrt(mean((volume - clima_pred)^2))]
## [1] 4114.535

6) Mean temperature model

Mean_temp1 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 1, custom = 'hourly_mean_grid',  reg_form = "volume ~ 1")

Mean_temp2 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 1, custom = 's(hourly_mean_grid)',  reg_form = "volume ~ 1")

7a) Combinations of Increasing # of Factor loadings

Custom_comb1 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 10,  reg_form = "volume ~ 1", cores =48)
Custom_comb2 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 2, custom = 's(PC1) + s(PC2)',  reg_form = "volume ~ 1", cores =48)
Custom_comb3 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 3, custom = 's(PC1) + s(PC3)',  reg_form = "volume ~ 1", cores =48)
Custom_comb4= demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps =4, custom = 's(PC1) + s(PC4)',  reg_form = "volume ~ 1", cores =48)
Custom_comb5 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 5, custom = 's(PC1) + s(PC5)',  reg_form = "volume ~ 1", cores =48)
Custom_comb6 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 6, custom = 's(PC1) + s(PC6)',  reg_form = "volume ~ 1", cores =48)
Custom_comb7 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 7, custom = 's(PC1) + s(PC7)',  reg_form = "volume ~ 1", cores =48)
Custom_comb8 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 8, custom = 's(PC1) + s(PC8)',  reg_form = "volume ~ 1", cores =48)
Custom_comb9 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 9, custom = 's(PC1) + s(PC9)',  reg_form = "volume ~ 1", cores =48)
Custom_comb10 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 10, custom = 's(PC1) + s(PC10)',  reg_form = "volume ~ 1", cores =48)
plot(Custom_comb1$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date], type = 'l', ylim = c(2000, 13700))
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date], col = 'red')
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_3)^2)), keyby = init_date])
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_4)^2)), keyby = init_date])
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_5)^2)), keyby = init_date])
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_6)^2)), keyby = init_date])
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_7)^2)), keyby = init_date])
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_8)^2)), keyby = init_date])
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_9)^2)), keyby = init_date])
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_10)^2)), keyby = init_date])
lines(Naive$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(Climatology$Result[,sqrt(mean((volume - clima_pred)^2)), keyby = init_date], col = 'blue')

plot(Custom_comb1$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)], type = 'l', ylim = c(2000, 13700))
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)], col = 'red')
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_3)^2)), keyby = month(init_date)])
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_4)^2)), keyby = month(init_date)])
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_5)^2)), keyby = month(init_date)])
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_6)^2)), keyby = month(init_date)])
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_7)^2)), keyby = month(init_date)])
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_8)^2)), keyby = month(init_date)])
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_9)^2)), keyby = month(init_date)])
lines(Custom_comb1$Result[,sqrt(mean((volume - pred_10)^2)), keyby = month(init_date)])
lines(Naive$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(Climatology$Result[,sqrt(mean((volume - clima_pred)^2)), keyby = month(init_date)], col = 'blue')

Custom_comb1$Result[,sqrt(mean((volume - pred_1)^2))] 
## [1] 8004.572
Custom_comb1$Result[,sqrt(mean((volume - pred_2)^2))] 
## [1] 5323.503
Custom_comb1$Result[,sqrt(mean((volume - pred_3)^2))] 
## [1] 5334.805
Custom_comb1$Result[,sqrt(mean((volume - pred_4)^2))]
## [1] 5777.554
Custom_comb1$Result[,sqrt(mean((volume - pred_5)^2))]
## [1] 6270.575
Custom_comb1$Result[,sqrt(mean((volume - pred_6)^2))]
## [1] 6948.271
Custom_comb1$Result[,sqrt(mean((volume - pred_7)^2))]
## [1] 6906.227
Custom_comb1$Result[,sqrt(mean((volume - pred_8)^2))]
## [1] 6942.777
Custom_comb1$Result[,sqrt(mean((volume - pred_9)^2))]
## [1] 6869.104
Custom_comb1$Result[,sqrt(mean((volume - pred_10)^2))]
## [1] 6862.388
Naive$Result[,sqrt(mean((volume - pred_1)^2))]
## [1] 7023.913
Climatology$Result[,sqrt(mean((volume - clima_pred)^2))]
## [1] 4114.535

7b) Combinations of 2 Factor loadings

plot(Custom_comb2$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date], type = 'l', ylim = c(2000, 13700), col = 'red')
lines(Custom_comb3$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_comb4$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_comb5$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_comb6$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_comb5$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_comb7$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_comb8$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_comb9$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Custom_comb10$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date])
lines(Naive$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(Climatology$Result[,sqrt(mean((volume - clima_pred)^2)), keyby = init_date])

plot(Custom_comb2$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)], type = 'l', ylim = c(2000, 13700), col = 'red')
lines(Custom_comb3$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_comb4$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_comb5$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_comb6$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_comb7$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_comb8$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_comb9$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Custom_comb10$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)])
lines(Naive$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(Climatology$Result[,sqrt(mean((volume - clima_pred)^2)), keyby = month(init_date)])

Custom_comb2$Result[,sqrt(mean((volume - pred_2)^2))] 
## [1] 5334.805
Custom_comb3$Result[,sqrt(mean((volume - pred_2)^2))] 
## [1] 5759.798
Custom_comb4$Result[,sqrt(mean((volume - pred_2)^2))] 
## [1] 5474.164
Custom_comb5$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 5545.834
Custom_comb6$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 5298.785
Custom_comb7$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 5328.911
Custom_comb8$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 5372.787
Custom_comb9$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 5389.642
Custom_comb10$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 5334.216
Naive$Result[,sqrt(mean((volume - pred_1)^2))]
## [1] 7023.913
Climatology$Result[,sqrt(mean((volume - clima_pred)^2))]
## [1] 4114.535

Final Best Demand Models

Factor10_inter1 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 10,  reg_form = "volume ~ as.factor(hour):as.factor(week)  + as.factor(w_day):as.factor(week) + s(week) + s(month) + season1 + season2 + year", cores = 48)

Factor10_inter2 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 10,  reg_form = "volume ~ as.factor(hour):as.factor(month)  + as.factor(w_day):as.factor(week) + s(week) + s(month) + season1 + season2 + year", cores = 48) 

Factor10_inter3 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 10,  reg_form = "volume ~ as.factor(hour):as.factor(week)  + as.factor(w_day):as.factor(month) + s(week) + s(month) + season1 + season2 + year", cores = 48) 

Factor10_inter4 = demand_forecast(X_mat, date_demand,forc_start = '2016-01-01', forc_end = '2023-01-01', pred_win = 30, pred_lag = 15, train_y = 5, 
                    p_comps = 10,  reg_form = "volume ~ as.factor(hour):as.factor(month)  + as.factor(w_day):as.factor(month) + s(week) + s(month) + season1 + season2 + year", cores = 48) #
plot(Factor10_inter3$Result[,sqrt(mean((volume - pred_2)^2)), keyby = init_date], type = 'l', ylim = c(1600, 8000))
lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_3)^2)), keyby = init_date], col = 'red')
lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_4)^2)), keyby = init_date])
lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_5)^2)), keyby = init_date])
#lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_6)^2)), keyby = init_date])
#lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_7)^2)), keyby = init_date])
#lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_8)^2)), keyby = init_date])
#lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_9)^2)), keyby = init_date])
#lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_10)^2)), keyby = init_date])

#lines(Naive$Result[,sqrt(mean((volume - pred_1)^2)), keyby = init_date])
lines(Climatology$Result[,sqrt(mean((volume - clima_pred)^2)), keyby = init_date], col = 'blue')

plot(Factor10_inter3$Result[,sqrt(mean((volume - pred_2)^2)), keyby = month(init_date)], type = 'l', ylim = c(2000, 6000))
lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_3)^2)), keyby = month(init_date)], col = 'red')
lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_4)^2)), keyby = month(init_date)])
lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_5)^2)), keyby = month(init_date)])
lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_6)^2)), keyby = month(init_date)])
#lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_7)^2)), keyby = month(init_date)])
#lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_8)^2)), keyby = month(init_date)])
#lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_9)^2)), keyby = month(init_date)])
#lines(Factor10_inter3$Result[,sqrt(mean((volume - pred_10)^2)), keyby = month(init_date)])
#lines(Naive$Result[,sqrt(mean((volume - pred_1)^2)), keyby = month(init_date)])
lines(Climatology$Result[,sqrt(mean((volume - clima_pred)^2)), keyby = month(init_date)], col = 'blue')

Factor10_inter3$Result[,sqrt(mean((volume - pred_2)^2))]
## [1] 2754.813
Factor10_inter3$Result[,sqrt(mean((volume - pred_3)^2))] #Best 
## [1] 2643.16
Factor10_inter3$Result[,sqrt(mean((volume - pred_4)^2))] 
## [1] 2758.652
Factor10_inter3$Result[,sqrt(mean((volume - pred_5)^2))] 
## [1] 2833.004
Factor10_inter3$Result[,sqrt(mean((volume - pred_6)^2))]
## [1] 2790.928
Factor10_inter3$Result[,sqrt(mean((volume - pred_7)^2))]
## [1] 2795.672
Factor10_inter3$Result[,sqrt(mean((volume - pred_8)^2))]
## [1] 2804.58
Factor10_inter3$Result[,sqrt(mean((volume - pred_9)^2))]
## [1] 2787.645
Factor10_inter3$Result[,sqrt(mean((volume - pred_10)^2))]
## [1] 2817.735
Naive$Result[,sqrt(mean((volume - pred_1)^2))]
## [1] 7023.913
Climatology$Result[,sqrt(mean((volume - clima_pred)^2))]
## [1] 4114.535

Main plotting Function

Errorplot_factor_loading = function(data, plot_title = "Averaged Train and Test Error (2016-01:2023-01)", leg_place = 'topright', total_var = 11){
    
    test_pc = paste0('pred_', 1:total_var)
    train_pc = paste0('RMSE_train_', 1:total_var)
    test_plot = data[, lapply(.SD, function(x) sqrt(mean((volume - x)^2, na.rm=TRUE))), .SDcols = test_pc]
    #train_plot = data[, lapply(.SD, function(x) mean(x)), .SDcols = train_pc]
    Mrg = merge(Year1$Results[,.(date, hour, lead_time,N_train_1)], data, by = c("date", "hour", "lead_time"))

    train_plot = rep(NA, total_var)
    for (i in 1:total_var){
        vars = c("N_train_1", "lead_time", train_pc[i])
        Mrg_omit = na.omit(Mrg[,..vars])
        train_plot[i] = Mrg_omit[lead_time==361,sum(get(train_pc[i])*N_train_1, na.rm = FALSE)/ sum(N_train_1)]
    }
    print(test_plot)
    x = seq(1:length(test_plot))-1
    
    
    par(mar=c(5,5,5,5))
    # Test plot
    plot(x, test_plot, type = 'l', cex.lab = 1.3, cex.main = 1.5, pch = 16, col = "red", main = paste(plot_title), ylab = "Test Error", xlab = "Number of Factor Loadings in Model")  
    points(x, test_plot, pch = 16, col = "red")
    lines(x, Factor10_inter2$Results[, lapply(.SD, function(x) sqrt(mean((volume - x)^2, na.rm=TRUE))), .SDcols = test_pc], lty = 2, col = 'grey')
    lines(x, Factor10_inter1$Results[, lapply(.SD, function(x) sqrt(mean((volume - x)^2, na.rm=TRUE))), .SDcols = test_pc], lty = 2, col = 'grey')
    lines(x, Factor10_inter4$Results[, lapply(.SD, function(x) sqrt(mean((volume - x)^2, na.rm=TRUE))), .SDcols = test_pc], lty = 2, col = 'grey')
    par(new = TRUE)         #new axis
    
    # Train plot
    plot(x, train_plot, type = 'l', cex.lab = 1.7, col = "black", axes = FALSE, xlab = "", ylab = "")
    points(x, train_plot, pch = 16, col = "black")
    
    axis(side = 4, at = pretty(range(train_plot)))      
    mtext("Train Error", side = 4, line = 3, cex = 1.3)
    legend(leg_place, lty = 1, cex = 1, pch = 16, col = c('red', 'black', 'grey'), legend = c("Test Errors", "Training Errors", "Test Error (Alt Params)"))

    
}

test_pc = paste0('pred_', 1:11)
train_pc = paste0('RMSE_train_', 1:11)

Errorplot_factor_loading(Factor10_inter3$Results)
##      pred_1   pred_2  pred_3   pred_4   pred_5   pred_6   pred_7  pred_8
## 1: 3136.346 2754.813 2643.16 2758.652 2833.004 2790.928 2795.672 2804.58
##      pred_9  pred_10  pred_11
## 1: 2787.645 2817.735 2831.075

Errorplot_factor_loading(Factor10_inter3$Results[init_date>'2018-06-01',]) #Greys are not adjusted here
##    pred_1   pred_2   pred_3  pred_4   pred_5   pred_6   pred_7   pred_8  pred_9
## 1: 3151.5 2795.277 2679.202 2960.78 3051.231 2983.538 3007.066 3014.087 2987.16
##     pred_10  pred_11
## 1: 3021.007 3043.711

by_init =Factor10_inter4$Results[, lapply(.SD, function(x) sqrt(mean((volume - x)^2))), .SDcols = test_pc, by = init_date]

by_init[, lapply(.SD, function(x) mean(x)), .SDcols = test_pc]
##      pred_1  pred_2   pred_3   pred_4   pred_5  pred_6   pred_7   pred_8
## 1: 2980.571 2671.74 2577.268 2718.312 2775.574 2772.02 2766.674 2779.196
##      pred_9  pred_10  pred_11
## 1: 2755.004 2772.657 2782.223
Factor10_inter2$Results[, lapply(.SD, function(x) sqrt(mean((volume - x)^2))), .SDcols = test_pc]
##      pred_1   pred_2   pred_3   pred_4   pred_5   pred_6  pred_7   pred_8
## 1: 3211.302 2834.293 2710.348 2803.514 2873.844 2837.534 2845.35 2849.913
##      pred_9  pred_10  pred_11
## 1: 2827.387 2851.433 2873.582
Factor10_inter3$Results[, lapply(.SD, function(x) sqrt(mean((volume - x)^2))), .SDcols = test_pc]
##      pred_1   pred_2  pred_3   pred_4   pred_5   pred_6   pred_7  pred_8
## 1: 3136.346 2754.813 2643.16 2758.652 2833.004 2790.928 2795.672 2804.58
##      pred_9  pred_10  pred_11
## 1: 2787.645 2817.735 2831.075
Factor10_inter4$Results[, lapply(.SD, function(x) sqrt(mean((volume - x)^2))), .SDcols = test_pc]
##      pred_1   pred_2  pred_3   pred_4   pred_5   pred_6   pred_7   pred_8
## 1: 3126.086 2776.306 2665.76 2827.642 2899.386 2873.618 2873.623 2886.853
##      pred_9 pred_10  pred_11
## 1: 2860.965 2878.64 2893.528
#Factor10_inter1_real[, lapply(.SD, function(x) sqrt(mean((volume - x)^2,na.rm = TRUE))), .SDcols = test_pc]



#Skill by month

Factor10_inter3$Results[,  sqrt(mean((volume - pred_3)^2)), by = month(date)]
##     month       V1
##  1:     1 2872.760
##  2:     2 2628.866
##  3:     3 2978.028
##  4:     4 2926.502
##  5:     5 2423.092
##  6:     6 2271.231
##  7:     7 2347.422
##  8:     8 2169.563
##  9:     9 2287.631
## 10:    10 2625.023
## 11:    11 2489.908
## 12:    12 3353.914
plot(1 - (Factor10_inter3$Results[,  sqrt(mean((volume - pred_3)^2)), by = month(date)][,V1]/
              Climatology$Results[, sqrt(mean((volume - clima_pred)^2)), by = month(date)][,V1])^2,
     type = 'l', ylim = c(0, 0.8), xlab = 'Month', ylab = 'Skill Score',
     main = 'Skill of GAMFL2 by Month')
axis(month.abb, side = 4, labels = month.abb, at = 1:12)
legend('topright', lty = 1, cex = 1, pch = 16, col = c('black','red'), title = 'Skill relative to:', legend = c("Climatology", "Time Interaction"))

lines(1 - (Factor10_inter3$Results[,  sqrt(mean((volume - pred_3)^2)), by = month(date)][,V1]/
               Inter4$Result[, sqrt(mean((volume - pred_1)^2)), by = month(date)][,V1])^2, col = 'red')

plot(Factor10_inter3$Results[,  sqrt(mean((volume - pred_3)^2)), by = lead_time], type = 'l')

#Mrg[lead_time==361, sum(RMSE_train_3*N_train_1)/ sum(N_train_1)]
acf(Factor10_inter3$Result[,volume -pred_2])

Compare combination Models

plot(Factor10_inter3$Result[,volume])
lines(Factor10_inter3$Result[,volume], col = 'red')

plot(Factor10_inter3$Result[year(date) ==2016 & month ==3,volume], cex = 0.4)
#lines(Baseline_old$Result[year(date) ==2019& month ==3,pred_1], col = 'blue')
lines(Climatology$Result[year(date) ==2016& month ==3,clima_pred], col = 'blue')
#lines(Best$Result[year(date) ==2019& month ==3,pred_2], col = 'red', lty = 2)
lines(Factor10_inter3$Result[year(date) ==2016& month ==3,pred_3], col = 'red')

#lines(Naive$Result[year(date) ==2019& month ==3,pred_1], col = 'yellow', lty = 2)

plot(Factor10_inter3$Result[year(date) ==2021,volume -pred_3], col = 'purple', type = 'l')

acf(Factor10_inter3$Result[year(date) ==2021,volume -pred_3])

acf(Factor10_inter3$Result[,volume -pred_3])

Make a temperature plot with

  1. Loss for all years
  2. Compare temp for 2016 against mean temp over all year per hour, day
  3. Compare predictions from baseline model and best model for 2016
  4. Compare volume for that year

Skill by month Show difference to pure climatology model.

jensens inequality read up

Instead of doing the squared error directly

we sum the predictions for each day and compared for summed volume.

the other version has a rational for stake holders

if(file.exists("~/Desktop/Master2023/Data/")){
    prep = prep_demand_temp_data(include_na_volume = FALSE, 
                          path = "~/Desktop/Master2023/Data/")
}else{
    prep = prep_demand_temp_data(include_na_volume = FALSE)
}
## [1] "We are good to go!"
## [1] "Demand volume data from 2013-01-01 to 2023-02-17."
## [1] "Temperature data from 2013-01-01 to 2023-01-31."
## [1] "We have 88392 hourly observations at 462 grid points."
#
X_mat = prep$X_mat #Temperature field
date_demand = prep$date_demand
X_mean = rowMeans(X_mat) - 273.15
date_demand[, grid_mean_temp:= X_mean]
date_demand[, climatology:= mean(grid_mean_temp), by = .(hour, yday(date))]
plot(date_demand[year(date)==2016 & hour %in%12,.(date, climatology)], type = 'l', 
     ylim = c(-9, 16), ylab = 'Temperature (°C)', xlab = 'Date', main = 'Daily Mean Grid Temperature at 12:00 (lat 4-25, long 55-75)')
lines(date_demand[year(date)==2016 & hour %in%12,.(date, grid_mean_temp)], col = 'red')
legend('bottomright', lty = 1, cex = 0.7, col = c('red', 'black'), legend = c("2016 observed","Climatology"), title = 'Temperature')

date_demand[, climatology:= mean(grid_mean_temp), by = .(yday(date))]
date_demand[, mm_temp:= mean(grid_mean_temp), by = date]
plot(date_demand[year(date)==2016,.(date, mm_temp)], type = 'l', ylim = c(-8.7, 17))
lines(date_demand[year(date)==2016,.(date, climatology)], col = 'red')

yr = 2016
date_demand[, mean_vol:= mean(volume), by = date]
plot(  date_demand[year(date)==yr & hour %in%12,volume], col = 'black', ylim = c(30000, 65000), pch = 16, cex= 0.6, type = 'l')
lines(Factor10_inter3$Results[year(date)==yr & hour %in%12,pred_3], col = 'red')
lines(Factor10_inter3$Results[year(date)==yr & hour %in%12,pred_1], col = 'blue')

plot(date_demand[year(date)==yr &  wday(date)  %in% c(1) &hour %in%12,volume, by = date], col = 'black', ylim = c(30000, 65000), pch = 16, cex= 0.6, type = 'l')

#Reason for including weekday-term 
hour_x = 24
plot(date_demand[year(date)==yr &  wday(date)  %in% c(1) &hour %in%hour_x,volume, by = date], col = 'black', ylim = c(25000, 65000), pch = 16, cex= 0.6, type = 'l', main = paste0('Difference between weekdays at ', hour_x, ':00.Rda'))
for (i in 2:7){
    lines(date_demand[year(date)==yr &  wday(date)  %in% c(i) &hour %in%hour_x,volume, by = date], col = i)
}

# "2014-02-15" "2014-04-15" "2014-06-15" "2014-08-15"
# "2014-09-15" "2014-11-15" "2015-01-15" "2015-02-15" "2015-04-15" "2015-06-15"
# "2015-08-15" "2015-09-15" "2015-11-15" "2016-01-15" "2016-02-15" "2016-04-15"
# "2016-06-15" "2016-08-15" "2016-09-15" "2016-11-15" "2017-01-15" "2017-02-15"
# "2017-04-15" "2017-06-15" "2017-08-15" "2017-09-15" "2017-11-15" "2018-01-15"
# "2018-02-15" "2018-04-15" "2018-06-15" "2018-08-15" "2018-09-15" "2018-11-15"
# "2019-01-15" "2019-02-15" "2019-04-15" "2019-06-15" "2019-08-15" "2019-09-15"
# "2019-11-15" "2020-01-15" "2020-02-15" "2020-04-15" "2020-06-15" "2020-08-15"
# "2020-09-15" "2020-11-15" "2021-01-15" "2021-02-15" "2021-04-15" "2021-06-15"
# "2021-08-15" "2021-09-15"

By lead time

What quality of the PCA are we leveraging

Comparing with 10 training years